library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✔ ggplot2 3.2.1          ✔ purrr   0.3.3     
## ✔ tibble  2.1.3          ✔ stringr 1.4.0.9000
## ✔ readr   1.3.1          ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(reshape)
## 
## Attaching package: 'reshape'
## The following objects are masked from 'package:tidyr':
## 
##     expand, smiths
## The following object is masked from 'package:dplyr':
## 
##     rename
library(ggplot2)
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(plotly)
## Warning: package 'plotly' was built under R version 3.6.2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:reshape':
## 
##     rename
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
import pandas as pd
import numpy as np
from sklearn import preprocessing
import matplotlib.pyplot as plt 


import math
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import RobustScaler 

from scipy import stats
setwd("~/Desktop/rsdata/")
# Creating a space for all the days data 
gen = function(firstday, lastday){
  step  = seq(firstday,lastday, by = 1)
  Rs = "Rs"
  rep = rep(Rs, lastday)
  for(i in step) {
    space =paste0(rep,step)
  }
  return(space)
}
# First day = 1 Last Day = Last Rs number in File 
sample = gen(firstday = 1, lastday = 53)
# reading in
mylist <- list() 
for (i in 1:length(sample)){
  mylist[[i]] <- read.csv(sample[i])
  mylist[[i]] = mylist[[i]][,-c(1,2,4,5)]
  colnames(mylist[[i]]) = c("Item", "Price")
}
# Dates 
date  = seq(as.Date("2020-08-16"), as.Date("2020-10-07"), by="days")
for(i in 1:length(date)){
  mylist[[i]]$date <- date[i]
  mylist[[i]]$day <- weekdays(as.Date(mylist[[i]]$date))
}
# Merging
data <- merge_recurse(mylist)
# Export 
write.csv(data,"RunescapeItems.csv", row.names = FALSE)
# Import
rune = read_csv("RunescapeItems.csv")
## Parsed with column specification:
## cols(
##   Item = col_character(),
##   Price = col_double(),
##   date = col_date(format = ""),
##   day = col_character()
## )
## Warning: 5133 parsing failures.
##  row   col               expected actual                 file
## 3130 Price no trailing characters   ,061 'RunescapeItems.csv'
## 3131 Price no trailing characters   ,943 'RunescapeItems.csv'
## 3135 Price no trailing characters   ,783 'RunescapeItems.csv'
## 3136 Price no trailing characters   ,465 'RunescapeItems.csv'
## 3137 Price no trailing characters   ,758 'RunescapeItems.csv'
## .... ..... ...................... ...... ....................
## See problems(...) for more details.
check = rune %>% filter(rune$Item == "Abyssal whip")
p = ggplot(check) + aes(date, Price) + 
  geom_line(linetype = "dashed") + 
  geom_point(color="deepskyblue", size=1) + 
  labs(title = "Abyssal whip", x = "Date",y = "Price") + 
  theme_bw()+
  theme(plot.title = element_text(hjust = 0.5))

ggplotly(p)
# Corr
ggpairs(check)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ab = check[,-c(1,4)]
write.csv(ab,"whip.csv", row.names = FALSE)
df = pd.read_csv(
  "whip.csv",
  parse_dates=['date'], 
  index_col="date"
)
df.head()
##             Price
## date             
## 2020-08-16  69200
## 2020-08-17  68000
## 2020-08-18  69900
## 2020-08-19  69900
## 2020-08-20  71100
# Featuring Eng
df['hour'] = df.index.hour
df['day_of_month'] = df.index.day
df['day_of_week'] = df.index.dayofweek
df['month'] = df.index.month
df.head()
##             Price  hour  day_of_month  day_of_week  month
## date                                                     
## 2020-08-16  69200     0            16            6      8
## 2020-08-17  68000     0            17            0      8
## 2020-08-18  69900     0            18            1      8
## 2020-08-19  69900     0            19            2      8
## 2020-08-20  71100     0            20            3      8
# Training / Testing 
train_size = int(len(df) * 0.60)
test_size = len(df) - train_size
train, test = df.iloc[0:train_size], df.iloc[train_size:len(df)]
print(len(train), len(test))
## 31 22
f_columns = ['Price']

f_transformer = RobustScaler()

# tranforming the Prices 
f_transformer = f_transformer.fit(train[f_columns].to_numpy())

train.loc[:, f_columns] = f_transformer.transform(
  train[f_columns].to_numpy()
)
test.loc[:, f_columns] = f_transformer.transform(
  test[f_columns].to_numpy()
)
def create_dataset(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)
        ys.append(y.iloc[i + time_steps])
    return np.array(Xs), np.array(ys)
time_steps = 7
X_train, y_train = create_dataset(train, train.Price, time_steps)
X_test, y_test = create_dataset(test, test.Price, time_steps)
print(X_train.shape, y_train.shape)
## (24, 7, 5) (24,)
model = keras.Sequential()
# units is the number of neurons 
model.add(
  keras.layers.Bidirectional(
    keras.layers.LSTM(
      units=128,
      input_shape=(X_train.shape[1], X_train.shape[2])
    )
  )
)

# Penaltize more Conplex models
model.add(keras.layers.Dropout(rate=0.1))
# Dense is the fully connected layer, Output layer so there is only one  
model.add(keras.layers.Dense(units=1, activation='relu'))
# Loss = Error term # Optim for to find the least error 
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['acc'])
history = model.fit(
    X_train, y_train,
    epochs=500,
    batch_size=25,
    validation_split=0.40,
    shuffle=False
)
plt.plot(history.history['loss'], label = 'loss')
plt.plot(history.history['val_loss'], label = 'val_loss')
plt.legend()
plt.show();

# Predicting on the testing data 
y_pred = model.predict(X_test)
y_train_inv = f_transformer.inverse_transform(y_train.reshape(-1,1))
y_test_inv = f_transformer.inverse_transform(y_test.reshape(-1,1))
y_pred_inv = f_transformer.inverse_transform(y_pred)
plt.plot(y_test_inv.flatten(), marker = '.', label = 'True')
plt.plot(y_pred_inv.flatten(),'r', marker = '.', label = 'Predicted')
plt.title('Abyssal whip')
plt.legend()
plt.xlim([0, 15])
## (0.0, 15.0)
plt.ylim([70000, 80000])
## (70000.0, 80000.0)
plt.show();

plt.plot(np.arange(0, len(y_train)), y_train_inv.flatten(), 'g', label="history")
plt.plot(np.arange(len(y_train), len(y_train) + len(y_test)),
y_test_inv.flatten(), marker='+', label="true")
plt.plot(np.arange(len(y_train), len(y_train) + len(y_test)), 
y_pred_inv.flatten(), 'r', label="prediction")
plt.legend()
plt.title('Abyssal whip')
plt.show();